# -*- coding: utf-8 -*-
import scrapy
import json
import math
from apps.models.data_scrapy import DataQunar
from apps.models.comment_scrapy import CommentQunar
from apps.tools.common_tools import *
from apps.models import OTA

'''
---------去哪儿爬虫爬取---------
1.数据 采用更新的方式进行添加，可以在漏爬的情况下 第二次爬取
2.引入重式机制，可以在触发反爬机制时，重新爬取
'''


class QunarSpider(scrapy.Spider):
    name = 'qunar'
    allowed_domains = ['touch.piao.qunar.com']
    start_urls = [
        'https://touch.piao.qunar.com/touch/queryCommentsAndTravelTips.json?type=mp&pageSize=1&fromType=SIGHT&pageNum=1&sightId=706176810']

    base_url = r'https://touch.piao.qunar.com/touch/queryCommentsAndTravelTips.json?type=mp&pageSize={page_size}&fromType=SIGHT&pageNum={page_num}&sightId={ota_spot_id}'
    ota_spot_ids = OTA.OtaSpotIdMap.get_ota_spot_list(OTA.OtaCode.QUNAR)  # ota 景区id列表
    page_size = 10

    def parse(self, response):
        # 爬取景区列表数据
        for ota_spot_id in self.ota_spot_ids:
            # 更新景区的评论数量
            url = self.base_url.format(ota_spot_id=ota_spot_id, page_num=1, page_size=1)

            yield scrapy.Request(url=url, callback=self.parse_data, dont_filter=True,
                                 meta={'page_num': 1, 'ota_spot_id': ota_spot_id})

    def parse_data(self, response):
        # print('--------从 ', response.url, ' 中爬取数据---------------')
        self.logger.info('------------start qunar data-------------- %s', response.url)
        try:
            result = json.loads(response.body)
            ret = result.get('ret')  # 判断True才能获得数据
            if ret == True:
                data = result.get('data')
                sight_id = response.meta['ota_spot_id']
                comment_avg_score = data['commentAvgScore']
                tag_list = data['tagList']
                total = data['total']
                DataQunar.objects(sight_id=sight_id).update_one(
                    set__comment_avg_score=comment_avg_score,
                    set__tag_list=tag_list,
                    set__total=total,
                    upsert=True
                )
                total_page = math.ceil(total / self.page_size)
                for page_num in range(1, total_page + 1):
                    print('---------开始爬取第 ', page_num, ' 页------------')
                    url = self.base_url.format(page_num=page_num, page_size=self.page_size, ota_spot_id=sight_id)
                    yield scrapy.Request(url=url, method='GET',
                                         callback=self.parse_comment, dont_filter=True,
                                         meta={'ota_spot_id': response.meta['ota_spot_id']})
        except json.decoder.JSONDecodeError:
            # -----------------出错则重试 -------------
            r = response.request.copy()
            r.dont_filter = True
            yield r

    def parse_comment(self, response):
        try:
            result = json.loads(response.body)

            ret = result.get('ret')
            if ret == True:
                print('-------准备插入数据')
                for key, value in enumerate(result.get('data')['commentList']):
                    sight_id = response.meta['ota_spot_id']
                    comment_id = value['commentId']
                    author = value['author']
                    comment_quality = value['commentQuality']
                    content = value['content']
                    date = value['date']
                    head_img = value['headImg']
                    imgs = value['imgs']
                    # product_id = value['productId']
                    product_id = value.setdefault('productId', '')
                    score = value['score']
                    sight_name = value['sightName']
                    # supplier_id = value['supplierId']
                    supplier_id = value.setdefault('supplierId', '')
                    # supplier_name = value['supplierName']
                    supplier_name = value.setdefault('supplierName', '')
                    tag_list = value['tagList']
                    CommentQunar.objects(sight_id=sight_id, comment_id=comment_id).update_one(
                        set__author=author,
                        set__comment_quality=comment_quality,
                        set__content=content,
                        set__date=date,
                        set__head_img=head_img,
                        set__imgs=imgs,
                        set__product_id=product_id,
                        set__score=score,
                        set__sight_name=sight_name,
                        set__supplier_id=supplier_id,
                        set__supplier_name=supplier_name,
                        set__tag_list=tag_list,
                        upsert=True
                    )

        except json.decoder.JSONDecodeError:
            # -----------------出错则重试 -------------
            r = response.request.copy()
            r.dont_filter = True
            yield r
